Linux Cubed Series 8: LINUX Games

home *** CD-ROM | disk | FTP | other *** search

/ Linux Cubed Series 8: LINUX Games / Linux Cubed Series 8 - LINUX Games.iso / games / x11 / rpg / crossfir.92 / crossfir / crossfire-0.92.5 / common / re-cmp.c < prev next >

Wrap

C/C++ Source or Header | 1996-07-24 | 12KB | 529 lines

/* * static char *rcsid_player_c = * "$Id: re-cmp.c,v 1.4 1994/03/29 08:26:45 master Exp $"; */ /* re-cmp.c * Pattern match a string, parsing some of the common RE-metacharacters. * * This code is public domain, but I would appreciate to hear of * improvements or even the fact that you use it in your program. * * Deliberate BUGS: * - These tokens are not supported: | ( ) * - You will get the longest expansion of the _first_ string which * matches the RE, not the longest string which would be the proper * behaviour for a RE-matcher. * * Author: Kjetil T. Homme <kjetilho@ifi.uio.no> May 1993 */ #include <stdio.h> #include <stdlib.h> #include <memory.h> #include <limits.h> #include <re-cmp.h> /* Get prototype functions to prevent warnings. */ #if defined (__sun__) && defined(StupidSunHeaders) # include <sys/types.h> # include <sys/time.h> # include "sunos.h" /* Prototypes for standard libraries, sunos lack those */ #endif /* P r o t o t y p e s */ char *re_cmp(char *, char *); static Boolean re_cmp_step(char *, char *, int, int); static void re_init(void); static Boolean re_match_token(uchar, selection *); static char *re_get_token(selection *, char *); #ifdef DEBUG2 static void re_dump_sel(selection *); #endif /* G l o b a l v a r i a b l e s */ static Boolean re_init_done = False; static selection *re_token[RE_TOKEN_MAX]; static char *re_substr[RE_TOKEN_MAX]; static unsigned int re_token_depth; /* E x t e r n a l f u n c t i o n */ /* re-cmp - get regular expression match. * Return values: NULL - no match or error in regexp. * pointer to beginning of matching string */ char * re_cmp(char *str, char *regexp) { char *next_regexp; Boolean once = False; Boolean matched; if (re_init_done == False) re_init(); #ifdef SAFE_CHECKS if (regexp == NULL || str == NULL) return NULL; #endif if (*regexp == '^') { once = True; ++regexp; } if (*regexp == 0) { /* // or /^/ matches any string */ return str; } next_regexp = re_get_token(re_token[0], regexp); re_token_depth = 0; re_substr[0] = next_regexp; matched = re_match_token(*str, re_token[0]); if (matched && *next_regexp == 0) return str; /* Apologies for the nearly duplicated code below, hopefully it * speeds things up. */ if (once) { switch (re_token[0]->repeat) { case rep_once: if (matched == False) return NULL; break; case rep_once_or_more: if (matched == False) return NULL; if (re_cmp_step(str+1, regexp, 0, 1)) return str; break; case rep_null_or_once: if (matched == False) return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; break; case rep_null_or_more: if (matched) { if (re_cmp_step(str+1, regexp, 0, 1)) return str; } else { return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL; } break; } return re_cmp_step(str+1, next_regexp, 1, 0) ? str : NULL; } if (matched) { switch (re_token[0]->repeat) { case rep_once: case rep_null_or_once: break; case rep_once_or_more: case rep_null_or_more: if (re_cmp_step(str+1, regexp, 0, 1)) return str; break; } if (re_cmp_step(str+1, next_regexp, 1, 0)) return str; } do { ++str; if (re_cmp_step(str, regexp, 0, 0)) return str; } while (*str); return NULL; } /* A u x i l l i a r y f u n c t i o n s */ static Boolean re_cmp_step(char *str, char *regexp, int slot, int matches) { /* str - string to match * regexp - pattern * slot - number of the token which under consideration * matches - how many times the token has matched */ char *next_regexp; Boolean matched; #ifdef DEBUG /* fprintf(stderr, "['%s', '%s', %d, %d]\n", str, regexp, slot, matches);*/ #endif if (*regexp == 0) { /* When we reach the end of the regexp, the match is a success */ return True; } /* This chunk of code makes sure that the regexp-tokenising happens * only once. We only tokenise as much as we need. */ if (slot > re_token_depth) { re_token_depth = slot; if (re_token[slot] == NULL) re_token[slot] = (selection *) malloc(sizeof(selection)); next_regexp = re_get_token(re_token[slot], regexp); if (next_regexp == NULL) { /* Syntax error, what else can we do? */ return False; } re_substr[slot] = next_regexp; } else { next_regexp = re_substr[slot]; } matched = re_match_token(*str, re_token[slot]); if (matched) ++matches; if (*str == 0) return (*next_regexp == 0 || re_token[slot]->type == sel_end); switch (re_token[slot]->repeat) { case rep_once: if (matches == 1) { /* (matches == 1) => (matched == True) */ return re_cmp_step(str+1, next_regexp, slot+1, 0); } return False; case rep_once_or_more: if (matched) { /* (matched == True) => (matches >= 1) */ /* First check if the current token repeats more */ if (re_cmp_step(str+1, regexp, slot, matches)) return True; return re_cmp_step(str+1, next_regexp, slot+1, 0); } return False; case rep_null_or_once: /* We must go on to the next token, but should we advance str? */ if (matches == 0) { return re_cmp_step(str, next_regexp, slot+1, 0); } else if (matches == 1) { return re_cmp_step(str+1, next_regexp, slot+1, 0); } return False; /* Not reached */ case rep_null_or_more: if (matched) { /* Look for further repeats, advance str */ if (re_cmp_step(str+1, regexp, slot, matches)) return True; return re_cmp_step(str, next_regexp, slot+1, 0); } return re_cmp_step(str, next_regexp, slot+1, 0); } return False; } static void re_init(void) { int i; re_token[0] = (selection *) malloc(sizeof(selection)); for (i = 1; i < RE_TOKEN_MAX; i++) re_token[i] = NULL; re_init_done = True; } static Boolean re_match_token(uchar c, selection *sel) { switch (sel->type) { case sel_any: return True; case sel_end: return (c == 0); case sel_single: return (c == sel->u.single); case sel_range: return (c >= sel->u.range.low && c <= sel->u.range.high); case sel_array: return (sel->u.array[c]); case sel_not_single: return (c != sel->u.single); case sel_not_range: return (c < sel->u.range.low && c > sel->u.range.high); } return False; } /* re_get_token - get regular expression token * Returns the first token found in <regexp> in <sel> * Return values: NULL syntax error * pointer to first character past token. */ static char * re_get_token(selection *sel, char *regexp) { #ifdef SAFE_CHECKS # define exit_if_null if (*regexp == 0) return NULL #else # define exit_if_null #endif Boolean quoted = False; uchar looking_at; #ifdef SAFE_CHECKS if (sel == NULL || regexp == NULL || *regexp == 0) return NULL; #endif do { looking_at = *regexp++; switch (looking_at) { case '$': if (quoted) { quoted = False; sel->type = sel_single; sel->u.single = looking_at; } else { sel->type = sel_end; } break; case '.': if (quoted) { quoted = False; sel->type = sel_single; sel->u.single = looking_at; } else { sel->type = sel_any; } break; case '[': /* The fun stuff... perhaps a little obfuscated since I * don't trust the compiler to analyse liveness. */ if (quoted) { quoted = False; sel->type = sel_single; sel->u.single = looking_at; } else { Boolean neg = False; uchar first, last = 0; exit_if_null; looking_at = *regexp++; if (looking_at == '^') { neg = True; exit_if_null; looking_at = *regexp++; } first = looking_at; exit_if_null; looking_at = *regexp++; if (looking_at == ']') { /* On the form [q] or [^q] */ sel->type = neg ? sel_not_single : sel_single; sel->u.single = first; break; } else if (looking_at == '-') { exit_if_null; last = *regexp++; if (last == ']') { /* On the form [A-] or [^A-]. Checking for * [,-] and making it a range is probably not * worth it :-) */ sel->type = sel_array; memset(sel->u.array, neg, sizeof(sel->u.array)); sel->u.array[first] = sel->u.array['-'] = !neg; break; } else { exit_if_null; looking_at = *regexp++; if (looking_at == ']') { /* On the form [A-G] or [^A-G]. Note that [G-A] * is a syntax error. Fair enough, I think. */ #ifdef SAFE_CHECK if (first > last) return NULL; #endif sel->type = neg ? sel_not_range : sel_range; sel->u.range.low = first; sel->u.range.high = last; break; } } } { /* The datastructure can only represent a RE this * complex with an array. */ int i; uchar previous; sel->type = sel_array; memset(sel->u.array, neg, sizeof(sel->u.array)); if (last) { /* It starts with a range */ #ifdef SAFE_CHECK if (first > last) return NULL; #endif for (i = first; i <= last; i++) { sel->u.array[i] = !neg; } } else { /* It begins with a "random" character */ sel->u.array[first] = !neg; } sel->u.array[looking_at] = !neg; exit_if_null; previous = looking_at; looking_at = *regexp++; /* Add more characters to the array until we reach * ]. Quoting doesn't and shouldn't work in here. * ("]" should be put first, and "-" last if they * are needed inside this construct.) * Look for ranges as we go along. */ while (looking_at != ']') { if (looking_at == '-') { exit_if_null; looking_at = *regexp++; if (looking_at != ']') { #ifdef SAFE_CHECK if (previous > looking_at) return NULL; #endif for (i = previous+1; i < looking_at; i++) { /* previous has already been set and * looking_at is set below. */ sel->u.array[i] = !neg; } exit_if_null; } else { sel->u.array['-'] = !neg; break; } } sel->u.array[looking_at] = !neg; previous = looking_at; exit_if_null; looking_at = *regexp++; } } } break; case '\\': if (quoted) { quoted = False; sel->type = sel_single; sel->u.single = looking_at; } else { quoted = True; } break; default: quoted = False; sel->type = sel_single; sel->u.single = looking_at; break; } } while (quoted); if (*regexp == '*') { sel->repeat = rep_null_or_more; ++regexp; } else if (*regexp == '?') { sel->repeat = rep_null_or_once; ++regexp; } else if (*regexp == '+') { sel->repeat = rep_once_or_more; ++regexp; } else { sel->repeat = rep_once; } return regexp; } /* D e b u g c o d e */ #ifdef DEBUG2 /* compile all with DEBUG also ? hevi@lut.fi */ static void re_dump_sel(selection *sel) { switch(sel->type) { case sel_any: printf("."); break; case sel_end: printf("$"); break; case sel_single: printf("<%c>", sel->u.single); break; case sel_range: printf("[%c-%c]", sel->u.range.low, sel->u.range.high); break; case sel_array: { int i; printf("["); for (i = 0; i < UCHAR_MAX; i++) { if (sel->u.array[i]) { printf("%c", i); } } printf("]"); } break; case sel_not_single: printf("[^%c]", sel->u.single); break; case sel_not_range: printf("[^%c-%c]", sel->u.range.low, sel->u.range.high); break; default: printf("<UNKNOWN TOKEN!>"); break; } switch(sel->repeat) { case rep_once: break; case rep_null_or_once: printf("?"); break; case rep_null_or_more: printf("*"); break; case rep_once_or_more: printf("+"); break; default: printf("<UNKNOWN REP-TOKEN!>"); break; } } int main(int argc, char *argv[]) { char *re, *m; selection sel; re = re_get_token(&sel, argv[1]); printf("'%s' -> '%s'\n", argv[1], re); re_dump_sel(&sel); printf("\n"); m = re_cmp(argv[2], argv[1]); if (m) printf("MATCH! -> '%s'\n", m); return 0; } #endif